import datetime

import pandas as pd

def select_demo():
    df = pd.read_csv("../datas/movielens-1m/users.dat",
                     sep="::",
                     engine="python",
                     header=None,
                     names="UserID::Gender::Age::Occupation::Zip-code".split("::"))
    # print(df)
    print(df.info(memory_usage="deep"))
    print("--------------------------------------")
    df_cat = df
    # print(df_cat.head())
    # 2、使用categorical类型降低存储量
    df_cat["Gender"] = df_cat['Gender'].astype("category")
    print(df_cat.info(memory_usage="deep"))
    print("------------------------------------")
    print(df_cat["Gender"].value_counts())

    begin = datetime.datetime.now()
    df.groupby("Gender").size()
    end = datetime.datetime.now()
    print((begin-end).microseconds)

    begin = datetime.datetime.now()
    df_cat.groupby("Gender").size()
    end = datetime.datetime.now()
    print((begin-end).microseconds)

if __name__ == '__main__':
    select_demo()
